Load data

# setwd("/Users/seantrott/Dropbox/UCSD/Research/NLMs/vlm-vit-num/analysis")
directory_path <- "../results"
csv_files <- list.files(path = directory_path, pattern = "*.csv", full.names = TRUE)
csv_list <- csv_files %>%
  map(~ read_csv(.))
## New names:
## Rows: 41860 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 78720 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 29120 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 47360 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 18460 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 21120 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 3380 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 8320 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 9880 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 24320 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 3380 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 32640 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 8580 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 21120 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 15080 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 37120 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 21580 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## New names:
## Rows: 53120 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): model_name, image_type, image_1, image_2, numerosity_comparison_type dbl
## (8): ...1, cosine_similarity, numerosity_1, numerosity_2, area_diff, lay...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
df_hf_models <- bind_rows(csv_list)


df_hf_models = df_hf_models %>%
  mutate(numerosity_diff = abs(numerosity_2 - numerosity_1)) %>%
  mutate(log_params = log10(n_params)) %>%
  group_by(model_name) %>%
  mutate(max_layer = max(layer)) %>%
  mutate(model_type = case_when(
    str_detect(model_name, "clip") == TRUE ~ "VLM",
    TRUE ~ "ViT"))  %>%
  mutate(cosine_similarity_z = scale(cosine_similarity),
         numerosity_diff_z = scale(numerosity_diff),
         area_diff_z = scale(area_diff)) %>%
  mutate(model_name2 = str_extract(model_name, "^(?:[^-]*-?){1,3}[^-]*"))


table(df_hf_models$model_name)
## 
##              clip-big-giant                  clip-giant 
##                       44100                       73800 
##                clip-huge-14       clip-vit-base-patch32 
##                       89100                       41860 
##      clip-vit-large-patch14  vit-base-patch16-224-in21k 
##                       58000                       11700 
##  vit-huge-patch14-224-in21k vit-large-patch16-224-in21k 
##                       89100                       45000 
## vit-large-patch32-224-in21k 
##                       22500
table(df_hf_models$model_name, df_hf_models$numerosity_comparison_type)
##                              
##                               different  same
##   clip-big-giant                  22050 22050
##   clip-giant                      36900 36900
##   clip-huge-14                    44550 44550
##   clip-vit-base-patch32           20930 20930
##   clip-vit-large-patch14          29000 29000
##   vit-base-patch16-224-in21k       5850  5850
##   vit-huge-patch14-224-in21k      44550 44550
##   vit-large-patch16-224-in21k     22500 22500
##   vit-large-patch32-224-in21k     11250 11250
table(df_hf_models$model_name, df_hf_models$image_type)
##                              
##                                dots rectangles
##   clip-big-giant              12740      31360
##   clip-giant                  21320      52480
##   clip-huge-14                25740      63360
##   clip-vit-base-patch32       16900      24960
##   clip-vit-large-patch14      26000      32000
##   vit-base-patch16-224-in21k   3380       8320
##   vit-huge-patch14-224-in21k  25740      63360
##   vit-large-patch16-224-in21k 13000      32000
##   vit-large-patch32-224-in21k  6500      16000

Descriptive analyses

df_hf_models %>%
  ggplot(aes(x = numerosity_diff)) +
  geom_bar(stat = "count", alpha = .6) +
  theme_minimal() +
  labs(x = "Difference in Numerosity") +
  theme(text = element_text(size = 15),
        legend.position = "bottom")

df_hf_models %>%
  ggplot(aes(x = area_diff)) +
  geom_bar(stat = "count", alpha = .6) +
  theme_minimal() +
  labs(x = "Difference in Surface Area") +
  theme(text = element_text(size = 15),
        legend.position = "bottom")

df_hf_models %>%
  ggplot(aes(x = cosine_similarity)) +
  geom_histogram(alpha = .6) +
  theme_minimal() +
  labs(x = "Cosine Similarity") +
  theme(text = element_text(size = 15)) +
  facet_wrap(~model_name)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Analyses

Same Vs. Different Numerosity

df_summary <- df_hf_models %>%
  group_by(model_name2, numerosity_comparison_type, layer, max_layer, image_type) %>%
  summarize(avg_similarity = mean(cosine_similarity, na.rm = TRUE),
            se_similarity = sd(cosine_similarity, na.rm = TRUE) / sqrt(n()))
## `summarise()` has grouped output by 'model_name2',
## 'numerosity_comparison_type', 'layer', 'max_layer'. You can override using the
## `.groups` argument.
df_summary %>%
  filter(layer == max_layer) %>%
  ggplot(aes(x = model_name2,
             y = avg_similarity,
             fill = numerosity_comparison_type)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.5), width = .6) +  
  # geom_point(position = position_dodge(width = 0.5)) +
  geom_errorbar(aes(ymin = avg_similarity - se_similarity, 
                    ymax = avg_similarity + se_similarity), 
                width = 0.2,
                position = position_dodge(width = 0.5)) + 
  labs(# title = "",
       x = "Model",
       y = "Average Cosine Similarity",
       fill = "",
       color = "") +
  theme_minimal() +
  coord_flip() +
  scale_fill_viridis(discrete=TRUE) +
  # scale_color_viridis_d() +
  scale_y_continuous(n.breaks = 3) +
  theme(axis.title = element_text(size=rel(1.2)),
        axis.text = element_text(size = rel(1.2)),
        legend.text = element_text(size = rel(1.2)),
        # legend.title = element_text(size = rel(1.5)),
        strip.text.x = element_text(size = rel(1.2)),
        legend.position = "bottom") +
  facet_wrap(~image_type)

m1 = lmer(data = df_hf_models,
          cosine_similarity_z ~ area_diff_z *layer + numerosity_comparison_type * layer + 
            (1 | image_1) + (1 | image_2) + 
            (1 |model_name))
## boundary (singular) fit: see help('isSingular')
## Warning: Model failed to converge with 1 negative eigenvalue: -3.3e+09
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## cosine_similarity_z ~ area_diff_z * layer + numerosity_comparison_type *  
##     layer + (1 | image_1) + (1 | image_2) + (1 | model_name)
##    Data: df_hf_models
## 
## REML criterion at convergence: 905586.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -15.0363  -0.3486   0.0627   0.4737   5.3165 
## 
## Random effects:
##  Groups     Name        Variance  Std.Dev. 
##  image_1    (Intercept) 3.504e-01 5.919e-01
##  image_2    (Intercept) 3.351e-01 5.789e-01
##  model_name (Intercept) 4.271e-31 6.536e-16
##  Residual               3.821e-01 6.181e-01
## Number of obs: 475160, groups:  image_1, 1974; image_2, 1845; model_name, 9
## 
## Fixed effects:
##                                        Estimate Std. Error         df  t value
## (Intercept)                           7.001e-01  2.165e-02  3.752e+03   32.342
## area_diff_z                           7.525e-01  8.799e-03  2.999e+04   85.517
## layer                                -6.703e-02  1.266e-04  4.719e+05 -529.214
## numerosity_comparison_typesame        1.601e-01  1.729e-02  4.213e+03    9.261
## area_diff_z:layer                     6.234e-03  8.855e-05  4.703e+05   70.401
## layer:numerosity_comparison_typesame  2.492e-02  1.784e-04  4.702e+05  139.677
##                                      Pr(>|t|)    
## (Intercept)                            <2e-16 ***
## area_diff_z                            <2e-16 ***
## layer                                  <2e-16 ***
## numerosity_comparison_typesame         <2e-16 ***
## area_diff_z:layer                      <2e-16 ***
## layer:numerosity_comparison_typesame   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) ar_df_ layer  nmrs__ ar_d_:
## area_diff_z -0.020                            
## layer       -0.078 -0.067                     
## nmrsty_cmp_ -0.445  0.111  0.092              
## ar_dff_z:ly  0.014 -0.349 -0.079 -0.039       
## lyr:nmrst__  0.058 -0.018 -0.707 -0.143  0.098
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

Continuous function of numerosity

df_hf_models <- df_hf_models %>%
  mutate(model_name2 = str_extract(model_name, "^(?:[^-]*-?){1,3}[^-]*"))

df_hf_models %>%
  filter(layer == max_layer) %>%
  ggplot(aes(x = numerosity_diff,
             y = cosine_similarity,
             color = model_type)) +
  geom_point(alpha = .5) +
  geom_smooth(method = "lm") +
  theme_minimal() +
  scale_color_viridis(option = "mako", discrete=TRUE) +
  labs(# title = "",
       x = "Numerosity Difference",
       y = "Cosine Similarity",
       fill = "",
       color = "Model Type") +
  theme(text = element_text(size = 12),
        legend.position = "bottom") +
  facet_wrap(~reorder(model_name2, n_params))
## `geom_smooth()` using formula 'y ~ x'

### Run lm by layer
results <- df_hf_models %>%
  # mutate(numerosity_diff = scale(numerosity_diff),
  #       cosine_similarity = scale(cosine_similarity)) %>%
  dplyr::group_by(layer, model_name) %>%
  dplyr::summarise(
    model_summary = list(
      broom::tidy(lm(cosine_similarity_z ~ numerosity_diff_z + area_diff_z, data = dplyr::cur_data()))
    ),
    r_squared = summary(lm(cosine_similarity_z ~ numerosity_diff_z + area_diff_z, data = dplyr::cur_data()))$r.squared
  ) %>%
  dplyr::mutate(
    numerosity_diff_z_coef = purrr::map_dbl(model_summary, ~ .x %>% dplyr::filter(term == "numerosity_diff_z") %>% dplyr::pull(estimate)),
    numerosity_diff_z_se = purrr::map_dbl(model_summary, ~ .x %>% dplyr::filter(term == "numerosity_diff_z") %>% dplyr::pull(std.error)),
    area_diff_z_coef = purrr::map_dbl(model_summary, ~ .x %>% dplyr::filter(term == "area_diff_z") %>% dplyr::pull(estimate)),
    area_diff_z_se = purrr::map_dbl(model_summary, ~ .x %>% dplyr::filter(term == "area_diff_z") %>% dplyr::pull(std.error))
  ) %>%
  dplyr::select(layer, model_name, numerosity_diff_z_coef, numerosity_diff_z_se, area_diff_z_coef, area_diff_z_se, r_squared)
## `summarise()` has grouped output by 'layer'. You can override using the
## `.groups` argument.
results %>%
  ggplot(aes(x = layer, y = numerosity_diff_z_coef, fill = model_name)) +
  geom_line(linetype = "dotted") +  # Lineplot for mean entropy
  geom_ribbon(aes(ymin = numerosity_diff_z_coef - numerosity_diff_z_se, 
                  ymax = numerosity_diff_z_coef + numerosity_diff_z_se),
              alpha = 0.5,
              color = NA) +  # Shading for SE
  labs(
    title = "",
    x = "Layer",
    y = "Coefficient (Numerosity)",
    fill = "",
  ) +
  theme_minimal() +
  scale_x_continuous(limits = c(0, max(results$layer)),
                     breaks = seq(0, max(results$layer), 4)) +
  theme(text = element_text(size = 12),
        legend.position = "none") +
  scale_color_viridis(option = "mako", discrete=TRUE) 

results %>%
  ggplot(aes(x = layer, y = area_diff_z_coef, fill = model_name)) +
  geom_line(linetype = "dotted") +  # Lineplot for mean entropy
  geom_ribbon(aes(ymin = area_diff_z_coef - area_diff_z_se, 
                  ymax = area_diff_z_coef + area_diff_z_se),
              alpha = 0.5,
              color = NA) +  # Shading for SE
  labs(
    title = "",
    x = "Layer",
    y = "Coefficient (Area)",
    fill = "",
  ) +
  theme_minimal() +
  scale_x_continuous(limits = c(0, max(results$layer)),
                     breaks = seq(0, max(results$layer), 4)) +
  theme(text = element_text(size = 12),
        legend.position = "none") +
  scale_color_viridis(option = "mako", discrete=TRUE)

m1 = lmer(data = df_hf_models,
          cosine_similarity_z ~ area_diff_z * layer + numerosity_diff_z *layer + 
            (1 | image_1) + (1 | image_2) + (1 | model_name) + (1 | image_type),
          control=lmerControl(optimizer="bobyqa"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00222338 (tol = 0.002, component 1)
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: cosine_similarity_z ~ area_diff_z * layer + numerosity_diff_z *  
##     layer + (1 | image_1) + (1 | image_2) + (1 | model_name) +  
##     (1 | image_type)
##    Data: df_hf_models
## Control: lmerControl(optimizer = "bobyqa")
## 
## REML criterion at convergence: 809613.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -16.0285  -0.3442   0.1194   0.4822   5.4859 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  image_1    (Intercept) 0.15688  0.3961  
##  image_2    (Intercept) 0.13469  0.3670  
##  model_name (Intercept) 0.18004  0.4243  
##  image_type (Intercept) 0.05501  0.2345  
##  Residual               0.31328  0.5597  
## Number of obs: 475160, groups:  
## image_1, 1974; image_2, 1845; model_name, 9; image_type, 2
## 
## Fixed effects:
##                           Estimate Std. Error         df  t value Pr(>|t|)    
## (Intercept)              9.291e-01  2.184e-01  2.987e+00    4.253   0.0240 *  
## area_diff_z             -8.657e-02  9.515e-03  5.275e+03   -9.098   <2e-16 ***
## layer                   -6.161e-02  8.577e-05  4.724e+05 -718.327   <2e-16 ***
## numerosity_diff_z        1.450e-02  6.493e-03  3.135e+03    2.234   0.0256 *  
## area_diff_z:layer        5.074e-03  7.988e-05  4.714e+05   63.523   <2e-16 ***
## layer:numerosity_diff_z -2.041e-02  8.125e-05  4.747e+05 -251.230   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) ar_df_ layer  nmrs__ ar_d_:
## area_diff_z -0.012                            
## layer       -0.005  0.000                     
## nmrsty_dff_  0.005 -0.152  0.000              
## ar_dff_z:ly  0.002 -0.227  0.000  0.036       
## lyr:nmrst__  0.000 -0.015  0.000 -0.162 -0.051
## optimizer (bobyqa) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00222338 (tol = 0.002, component 1)
### Layer depth ratio

results %>%
  group_by(model_name) %>%
  mutate(max_layer = max(layer),
         prop_layer = layer / max_layer) %>%
  mutate(binned_prop_layer = ntile(prop_layer, 10)) %>%
  mutate(prop_binned = binned_prop_layer / 10) %>%
  ggplot(aes(x = prop_binned, y = numerosity_diff_z_coef, fill = model_name)) +
  geom_line(linetype = "dotted") +  # Lineplot for mean entropy
  geom_ribbon(aes(ymin = numerosity_diff_z_coef - numerosity_diff_z_se, 
                  ymax = numerosity_diff_z_coef + numerosity_diff_z_se),
              alpha = 0.5,
              color = NA) +  # Shading for SE
  labs(
    title = "",
    x = "Layer Depth",
    y = "Coefficient (Numerosity)",
    fill = "",
  ) +
  theme_minimal() +
  theme(text = element_text(size = 15),
        legend.position = "none") +
  scale_color_viridis(option = "mako", discrete=TRUE)

results %>%
  group_by(model_name) %>%
  mutate(max_layer = max(layer),
         prop_layer = layer / max_layer) %>%
  mutate(binned_prop_layer = ntile(prop_layer, 10)) %>%
  mutate(prop_binned = binned_prop_layer / 10) %>%
  ggplot(aes(x = prop_binned, y = numerosity_diff_z_coef)) +
  stat_summary(
    aes(group = model_name,
        color = model_name),  
    fun = mean,    
    geom = "line",        
    size = 2              
  ) +
  stat_summary(
    aes(group = model_name, 
        fill = model_name), 
    fun.data = mean_se,    
    geom = "ribbon",  
    alpha = 0.2,   
    color = NA     
  ) +
  theme_minimal() +
  labs(
    title = "",
    x = "Layer Depth",
    y = "Coefficient (Numerosity)",
    fill = "",
    color = "",
  ) +
  scale_color_viridis(option = "mako", discrete = TRUE) +
  theme(text = element_text(size = 15),
        legend.position = "none") 

results %>%
  mutate(model_type = case_when(
    str_detect(model_name, "clip") == TRUE ~ "VLM",
    TRUE ~ "ViT")) %>%
  group_by(model_type) %>%
  mutate(max_layer = max(layer),
         prop_layer = layer / max_layer) %>%
  mutate(binned_prop_layer = ntile(prop_layer, 10)) %>%
  mutate(prop_binned = binned_prop_layer / 10) %>%
  ggplot(aes(x = prop_binned, y = numerosity_diff_z_coef)) +
  stat_summary(
    aes(group = model_type,
        color = model_type),  
    fun = mean,    
    geom = "line",        
    size = 2              
  ) +
  stat_summary(
    aes(group = model_type, 
        fill = model_type), 
    fun.data = mean_se,    
    geom = "ribbon",  
    alpha = 0.2,   
    color = NA     
  ) +
  theme_minimal() +
  labs(
    title = "",
    x = "Layer Depth",
    y = "Coefficient (Numerosity)",
    fill = "",
    color = "",
  ) +
  scale_color_viridis(option = "mako", discrete = TRUE) +
  theme(text = element_text(size = 15),
        legend.position = "bottom") 

VLM vs. ViT

Same vs. Different

df_hf_models = df_hf_models %>%
  mutate(model_type = case_when(
    str_detect(model_name, "clip") == TRUE ~ "VLM",
    TRUE ~ "ViT")) %>%
  group_by(model_name) %>%
  mutate(max_layer = max(layer))

df_summary <- df_hf_models %>%
  filter(layer == max_layer) %>%
  group_by(model_type, numerosity_comparison_type) %>%
  summarize(avg_similarity = mean(cosine_similarity, na.rm = TRUE),
            se_similarity = sd(cosine_similarity, na.rm = TRUE) / sqrt(n()))
## `summarise()` has grouped output by 'model_type'. You can override using the
## `.groups` argument.
df_summary %>%
  ggplot(aes(x = factor(model_type),
             y = avg_similarity,
             color = numerosity_comparison_type)) +
  geom_point(position = position_dodge(width = 0.5), size = 2) +  
  geom_errorbar(aes(ymin = avg_similarity - 2 * se_similarity, 
                    ymax = avg_similarity + 2 * se_similarity), 
                width = 0.2,
                position = position_dodge(width = 0.5)) + 
  labs(# title = "",
       x = "Model Type",
       y = "Average Cosine Similarity",
       color = "") +
  theme_minimal() +
  scale_color_viridis(discrete=TRUE) +
  theme(axis.title = element_text(size=rel(1.2)),
        axis.text = element_text(size = rel(1.2)),
        legend.text = element_text(size = rel(1.2)),
        # legend.title = element_text(size = rel(1.5)),
        strip.text.x = element_text(size = rel(1.2)),
        legend.position = "bottom")

### NOTE: Interaction is robust to multiverse analysis
m1 = lmer(data = filter(df_hf_models, layer == max_layer),
          cosine_similarity ~ area_diff * model_type + 
            numerosity_comparison_type * model_type + 
            patch_size * numerosity_comparison_type  + 
            (1 | image_1) + (1 | image_2) + (1|image_type) + (1|model_name))

summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## cosine_similarity ~ area_diff * model_type + numerosity_comparison_type *  
##     model_type + patch_size * numerosity_comparison_type + (1 |  
##     image_1) + (1 | image_2) + (1 | image_type) + (1 | model_name)
##    Data: filter(df_hf_models, layer == max_layer)
## 
## REML criterion at convergence: -58156.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.9686 -0.4350  0.0188  0.4888  5.2955 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  image_1    (Intercept) 0.004167 0.06455 
##  image_2    (Intercept) 0.003527 0.05939 
##  model_name (Intercept) 0.001066 0.03265 
##  image_type (Intercept) 0.002045 0.04522 
##  Residual               0.001165 0.03413 
## Number of obs: 17240, groups:  
## image_1, 1974; image_2, 1845; model_name, 9; image_type, 2
## 
## Fixed effects:
##                                                Estimate Std. Error         df
## (Intercept)                                   8.629e-01  4.651e-02  3.676e+00
## area_diff                                    -2.396e-05  3.928e-06  3.214e+03
## model_typeVLM                                 2.281e-02  2.238e-02  6.301e+00
## numerosity_comparison_typesame                5.772e-02  2.667e-03  7.327e+03
## patch_size                                    1.265e-04  1.507e-03  6.005e+00
## area_diff:model_typeVLM                       1.519e-05  5.857e-06  2.996e+03
## model_typeVLM:numerosity_comparison_typesame -1.252e-02  2.471e-03  1.456e+04
## numerosity_comparison_typesame:patch_size    -3.842e-04  7.719e-05  1.502e+04
##                                              t value Pr(>|t|)    
## (Intercept)                                   18.551 9.12e-05 ***
## area_diff                                     -6.099 1.20e-09 ***
## model_typeVLM                                  1.020  0.34547    
## numerosity_comparison_typesame                21.643  < 2e-16 ***
## patch_size                                     0.084  0.93583    
## area_diff:model_typeVLM                        2.593  0.00955 ** 
## model_typeVLM:numerosity_comparison_typesame  -5.066 4.11e-07 ***
## numerosity_comparison_typesame:patch_size     -4.978 6.50e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) ar_dff md_VLM nmrs__ ptch_s a_:_VL m_VLM:
## area_diff   -0.040                                          
## modl_typVLM -0.341  0.030                                   
## nmrsty_cmp_ -0.037  0.161  0.027                            
## patch_size  -0.631 -0.002  0.124  0.010                     
## ar_dff:_VLM  0.009 -0.483 -0.049 -0.045  0.003              
## mdl_tVLM:__  0.015 -0.082 -0.064 -0.419  0.006  0.085       
## nmrsty_c_:_  0.012  0.014  0.012 -0.426 -0.026 -0.010 -0.190

Multiverse analysis

The parameter estimate for the crucial interaction is negative even when other covariates and interactions are excluded.

m1 = lmer(data = filter(df_hf_models, layer == max_layer),
          cosine_similarity ~ # area_diff * model_type + 
            numerosity_comparison_type * model_type + 
            patch_size * numerosity_comparison_type  + 
            (1 | image_1) + (1 | image_2) + (1|image_type) + (1|model_name))

summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: cosine_similarity ~ numerosity_comparison_type * model_type +  
##     patch_size * numerosity_comparison_type + (1 | image_1) +  
##     (1 | image_2) + (1 | image_type) + (1 | model_name)
##    Data: filter(df_hf_models, layer == max_layer)
## 
## REML criterion at convergence: -58165.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.9447 -0.4347  0.0175  0.4880  5.2908 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  image_1    (Intercept) 0.004193 0.06475 
##  image_2    (Intercept) 0.003534 0.05945 
##  model_name (Intercept) 0.001054 0.03246 
##  image_type (Intercept) 0.001217 0.03489 
##  Residual               0.001167 0.03415 
## Number of obs: 17240, groups:  
## image_1, 1974; image_2, 1845; model_name, 9; image_type, 2
## 
## Fixed effects:
##                                                Estimate Std. Error         df
## (Intercept)                                   8.513e-01  4.163e-02  5.139e+00
## numerosity_comparison_typesame                6.039e-02  2.634e-03  7.638e+03
## model_typeVLM                                 2.657e-02  2.222e-02  6.276e+00
## patch_size                                    1.062e-04  1.498e-03  6.007e+00
## numerosity_comparison_typesame:model_typeVLM -1.371e-02  2.462e-03  1.449e+04
## numerosity_comparison_typesame:patch_size    -3.775e-04  7.724e-05  1.508e+04
##                                              t value Pr(>|t|)    
## (Intercept)                                   20.449 4.03e-06 ***
## numerosity_comparison_typesame                22.929  < 2e-16 ***
## model_typeVLM                                  1.196    0.275    
## patch_size                                     0.071    0.946    
## numerosity_comparison_typesame:model_typeVLM  -5.570 2.59e-08 ***
## numerosity_comparison_typesame:patch_size     -4.887 1.03e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) nmrs__ md_VLM ptch_s n__:_V
## nmrsty_cmp_ -0.034                            
## modl_typVLM -0.379  0.025                     
## patch_size  -0.701  0.011  0.124              
## nmrs__:_VLM  0.014 -0.416 -0.060  0.006       
## nmrsty_c_:_  0.014 -0.434  0.011 -0.026 -0.190
m1 = lmer(data = filter(df_hf_models, layer == max_layer),
          cosine_similarity ~ area_diff * model_type + 
            numerosity_comparison_type * model_type + 
            # patch_size * numerosity_comparison_type  + 
            (1 | image_1) + (1 | image_2) + (1|image_type) + (1|model_name))

summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## cosine_similarity ~ area_diff * model_type + numerosity_comparison_type *  
##     model_type + (1 | image_1) + (1 | image_2) + (1 | image_type) +  
##     (1 | model_name)
##    Data: filter(df_hf_models, layer == max_layer)
## 
## REML criterion at convergence: -58160.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.9776 -0.4352  0.0172  0.4853  5.2124 
## 
## Random effects:
##  Groups     Name        Variance  Std.Dev.
##  image_1    (Intercept) 0.0041814 0.06466 
##  image_2    (Intercept) 0.0035469 0.05956 
##  model_name (Intercept) 0.0009131 0.03022 
##  image_type (Intercept) 0.0020339 0.04510 
##  Residual               0.0011655 0.03414 
## Number of obs: 17240, groups:  
## image_1, 1974; image_2, 1845; model_name, 9; image_type, 2
## 
## Fixed effects:
##                                                Estimate Std. Error         df
## (Intercept)                                   8.642e-01  3.548e-02  1.502e+00
## area_diff                                    -2.370e-05  3.936e-06  3.211e+03
## model_typeVLM                                 2.426e-02  2.060e-02  7.430e+00
## numerosity_comparison_typesame                5.207e-02  2.417e-03  4.902e+03
## area_diff:model_typeVLM                       1.499e-05  5.869e-06  2.995e+03
## model_typeVLM:numerosity_comparison_typesame -1.488e-02  2.428e-03  1.488e+04
##                                              t value Pr(>|t|)    
## (Intercept)                                   24.357  0.00623 ** 
## area_diff                                     -6.023 1.91e-09 ***
## model_typeVLM                                  1.178  0.27514    
## numerosity_comparison_typesame                21.547  < 2e-16 ***
## area_diff:model_typeVLM                        2.555  0.01067 *  
## model_typeVLM:numerosity_comparison_typesame  -6.128 9.14e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) ar_dff md_VLM nmrs__ a_:_VL
## area_diff   -0.054                            
## modl_typVLM -0.322  0.033                     
## nmrsty_cmp_ -0.046  0.185  0.039              
## ar_dff:_VLM  0.015 -0.482 -0.054 -0.054       
## mdl_tVLM:__  0.024 -0.081 -0.068 -0.563  0.084
m1 = lmer(data = filter(df_hf_models, layer == max_layer),
          cosine_similarity ~ # area_diff * model_type + 
            numerosity_comparison_type * model_type + 
            # patch_size * numerosity_comparison_type  + 
            (1 | image_1) + (1 | image_2) + (1|image_type) + (1|model_name))

summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: cosine_similarity ~ numerosity_comparison_type * model_type +  
##     (1 | image_1) + (1 | image_2) + (1 | image_type) + (1 | model_name)
##    Data: filter(df_hf_models, layer == max_layer)
## 
## REML criterion at convergence: -58169.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.9539 -0.4359  0.0158  0.4852  5.2092 
## 
## Random effects:
##  Groups     Name        Variance  Std.Dev.
##  image_1    (Intercept) 0.0042072 0.06486 
##  image_2    (Intercept) 0.0035537 0.05961 
##  model_name (Intercept) 0.0009029 0.03005 
##  image_type (Intercept) 0.0012163 0.03488 
##  Residual               0.0011674 0.03417 
## Number of obs: 17240, groups:  
## image_1, 1974; image_2, 1845; model_name, 9; image_type, 2
## 
## Fixed effects:
##                                                Estimate Std. Error         df
## (Intercept)                                   8.525e-01  2.905e-02  1.857e+00
## numerosity_comparison_typesame                5.481e-02  2.377e-03  5.071e+03
## model_typeVLM                                 2.798e-02  2.045e-02  7.396e+00
## numerosity_comparison_typesame:model_typeVLM -1.602e-02  2.419e-03  1.480e+04
##                                              t value Pr(>|t|)    
## (Intercept)                                   29.346  0.00172 ** 
## numerosity_comparison_typesame                23.063  < 2e-16 ***
## model_typeVLM                                  1.368  0.21146    
## numerosity_comparison_typesame:model_typeVLM  -6.621 3.68e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) nmrs__ md_VLM
## nmrsty_cmp_ -0.045              
## modl_typVLM -0.391  0.035       
## nmrs__:_VLM  0.025 -0.562 -0.064

Continuous

df_hf_models %>%
  filter(layer == max_layer) %>%
  ggplot(aes(x = numerosity_diff,
             y = cosine_similarity,
             color = model_type)) +
  geom_point(alpha = .5) +
  geom_smooth(method = "lm") +
  theme_minimal() +
  labs(x = "Numerosity Difference",
       y = "Cosine Similarity",
       color = "") +
  scale_color_viridis(option = "mako", discrete=TRUE) +
  theme(text = element_text(size = 15),
        legend.position = "bottom") +
  facet_wrap(~model_type)
## `geom_smooth()` using formula 'y ~ x'

m1 = lmer(data = filter(df_hf_models, layer == max_layer),
          cosine_similarity ~ area_diff * model_type + numerosity_diff * model_type + 
            patch_size * numerosity_diff + 
            (1 | image_1) + (1 | image_2) + (1|image_type) + (1|model_name))

summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: cosine_similarity ~ area_diff * model_type + numerosity_diff *  
##     model_type + patch_size * numerosity_diff + (1 | image_1) +  
##     (1 | image_2) + (1 | image_type) + (1 | model_name)
##    Data: filter(df_hf_models, layer == max_layer)
## 
## REML criterion at convergence: -58801.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.0820 -0.4274  0.0196  0.4852  5.3095 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  image_1    (Intercept) 0.003299 0.05743 
##  image_2    (Intercept) 0.002839 0.05328 
##  model_name (Intercept) 0.001060 0.03256 
##  image_type (Intercept) 0.001385 0.03722 
##  Residual               0.001158 0.03402 
## Number of obs: 17240, groups:  
## image_1, 1974; image_2, 1845; model_name, 9; image_type, 2
## 
## Fixed effects:
##                                 Estimate Std. Error         df t value Pr(>|t|)
## (Intercept)                    9.197e-01  4.270e-02  4.759e+00  21.540 6.29e-06
## area_diff                     -1.738e-05  3.574e-06  3.317e+03  -4.863 1.21e-06
## model_typeVLM                  1.070e-02  2.224e-02  6.221e+00   0.481  0.64674
## numerosity_diff               -6.628e-03  1.952e-04  7.600e+03 -33.953  < 2e-16
## patch_size                    -2.949e-04  1.503e-03  6.004e+00  -0.196  0.85088
## area_diff:model_typeVLM        1.395e-05  5.313e-06  2.984e+03   2.626  0.00868
## model_typeVLM:numerosity_diff  1.142e-03  1.698e-04  1.579e+04   6.725 1.82e-11
## numerosity_diff:patch_size     4.862e-05  5.771e-06  1.527e+04   8.424  < 2e-16
##                                  
## (Intercept)                   ***
## area_diff                     ***
## model_typeVLM                    
## numerosity_diff               ***
## patch_size                       
## area_diff:model_typeVLM       ** 
## model_typeVLM:numerosity_diff ***
## numerosity_diff:patch_size    ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) ar_dff md_VLM nmrst_ ptch_s a_:_VL m_VLM:
## area_diff   -0.032                                          
## modl_typVLM -0.371  0.021                                   
## numrsty_dff -0.015 -0.145  0.015                            
## patch_size  -0.685 -0.002  0.126  0.008                     
## ar_dff:_VLM  0.008 -0.484 -0.038  0.015  0.002              
## mdl_tyVLM:_  0.007  0.072 -0.034 -0.427  0.003 -0.065       
## nmrsty_df:_  0.010 -0.016  0.007 -0.415 -0.018  0.010 -0.216

Multiverse analysis

The parameter estimate for the crucial interaction is positive even when other covariates and interactions are excluded.

m1 = lmer(data = filter(df_hf_models, layer == max_layer),
          cosine_similarity ~ # area_diff * model_type + 
            numerosity_diff * model_type + 
            patch_size * numerosity_diff + 
            (1 | image_1) + (1 | image_2) + (1|image_type) + (1|model_name))

summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: cosine_similarity ~ numerosity_diff * model_type + patch_size *  
##     numerosity_diff + (1 | image_1) + (1 | image_2) + (1 | image_type) +  
##     (1 | model_name)
##    Data: filter(df_hf_models, layer == max_layer)
## 
## REML criterion at convergence: -58824.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.1102 -0.4268  0.0189  0.4854  5.3055 
## 
## Random effects:
##  Groups     Name        Variance  Std.Dev.
##  image_1    (Intercept) 0.0032961 0.05741 
##  image_2    (Intercept) 0.0028395 0.05329 
##  model_name (Intercept) 0.0010507 0.03241 
##  image_type (Intercept) 0.0009065 0.03011 
##  Residual               0.0011593 0.03405 
## Number of obs: 17240, groups:  
## image_1, 1974; image_2, 1845; model_name, 9; image_type, 2
## 
## Fixed effects:
##                                 Estimate Std. Error         df t value Pr(>|t|)
## (Intercept)                    9.133e-01  3.965e-02  5.944e+00  23.033 4.86e-07
## numerosity_diff               -6.761e-03  1.928e-04  7.940e+03 -35.064  < 2e-16
## model_typeVLM                  1.318e-02  2.213e-02  6.204e+00   0.595    0.573
## patch_size                    -3.069e-04  1.496e-03  6.003e+00  -0.205    0.844
## numerosity_diff:model_typeVLM  1.202e-03  1.693e-04  1.575e+04   7.099 1.31e-12
## numerosity_diff:patch_size     4.817e-05  5.775e-06  1.532e+04   8.341  < 2e-16
##                                  
## (Intercept)                   ***
## numerosity_diff               ***
## model_typeVLM                    
## patch_size                       
## numerosity_diff:model_typeVLM ***
## numerosity_diff:patch_size    ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) nmrst_ md_VLM ptch_s n_:_VL
## numrsty_dff -0.022                            
## modl_typVLM -0.397  0.016                     
## patch_size  -0.735  0.008  0.126              
## nmrst_:_VLM  0.010 -0.425 -0.037  0.003       
## nmrsty_df:_  0.010 -0.423  0.007 -0.018 -0.215
m1 = lmer(data = filter(df_hf_models, layer == max_layer),
          cosine_similarity ~ area_diff * model_type + 
            numerosity_diff * model_type + 
            # patch_size * numerosity_diff + 
            (1 | image_1) + (1 | image_2) + (1|image_type) + (1|model_name))

summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: cosine_similarity ~ area_diff * model_type + numerosity_diff *  
##     model_type + (1 | image_1) + (1 | image_2) + (1 | image_type) +  
##     (1 | model_name)
##    Data: filter(df_hf_models, layer == max_layer)
## 
## REML criterion at convergence: -58764.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.0672 -0.4317  0.0217  0.4882  5.2036 
## 
## Random effects:
##  Groups     Name        Variance  Std.Dev.
##  image_1    (Intercept) 0.0033305 0.05771 
##  image_2    (Intercept) 0.0028755 0.05362 
##  model_name (Intercept) 0.0009083 0.03014 
##  image_type (Intercept) 0.0013663 0.03696 
##  Residual               0.0011608 0.03407 
## Number of obs: 17240, groups:  
## image_1, 1974; image_2, 1845; model_name, 9; image_type, 2
## 
## Fixed effects:
##                                 Estimate Std. Error         df t value Pr(>|t|)
## (Intercept)                    9.150e-01  3.032e-02  1.756e+00  30.172  0.00217
## area_diff                     -1.692e-05  3.590e-06  3.309e+03  -4.714 2.53e-06
## model_typeVLM                  9.550e-03  2.046e-02  7.317e+00   0.467  0.65426
## numerosity_diff               -5.948e-03  1.783e-04  5.182e+03 -33.353  < 2e-16
## area_diff:model_typeVLM        1.371e-05  5.339e-06  2.981e+03   2.569  0.01025
## model_typeVLM:numerosity_diff  1.454e-03  1.662e-04  1.614e+04   8.748  < 2e-16
##                                  
## (Intercept)                   ** 
## area_diff                     ***
## model_typeVLM                    
## numerosity_diff               ***
## area_diff:model_typeVLM       *  
## model_typeVLM:numerosity_diff ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) ar_dff md_VLM nmrst_ a_:_VL
## area_diff   -0.046                            
## modl_typVLM -0.375  0.023                     
## numrsty_dff -0.016 -0.167  0.021              
## ar_dff:_VLM  0.013 -0.484 -0.042  0.021       
## mdl_tyVLM:_  0.012  0.070 -0.036 -0.580 -0.064
m1 = lmer(data = filter(df_hf_models, layer == max_layer),
          cosine_similarity ~ # area_diff * model_type + 
            numerosity_diff * model_type + 
            # patch_size * numerosity_diff + 
            (1 | image_1) + (1 | image_2) + (1|image_type) + (1|model_name))

summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: cosine_similarity ~ numerosity_diff * model_type + (1 | image_1) +  
##     (1 | image_2) + (1 | image_type) + (1 | model_name)
##    Data: filter(df_hf_models, layer == max_layer)
## 
## REML criterion at convergence: -58788.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.0949 -0.4329  0.0206  0.4887  5.2007 
## 
## Random effects:
##  Groups     Name        Variance  Std.Dev.
##  image_1    (Intercept) 0.0033267 0.05768 
##  image_2    (Intercept) 0.0028751 0.05362 
##  model_name (Intercept) 0.0009003 0.03001 
##  image_type (Intercept) 0.0009053 0.03009 
##  Residual               0.0011626 0.03410 
## Number of obs: 17240, groups:  
## image_1, 1974; image_2, 1845; model_name, 9; image_type, 2
## 
## Fixed effects:
##                                 Estimate Std. Error         df t value Pr(>|t|)
## (Intercept)                    9.084e-01  2.617e-02  2.174e+00  34.709 0.000504
## numerosity_diff               -6.084e-03  1.754e-04  5.375e+03 -34.677  < 2e-16
## model_typeVLM                  1.201e-02  2.035e-02  7.296e+00   0.590 0.572864
## numerosity_diff:model_typeVLM  1.509e-03  1.657e-04  1.611e+04   9.106  < 2e-16
##                                  
## (Intercept)                   ***
## numerosity_diff               ***
## model_typeVLM                    
## numerosity_diff:model_typeVLM ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) nmrst_ md_VLM
## numrsty_dff -0.029              
## modl_typVLM -0.432  0.023       
## nmrst_:_VLM  0.017 -0.582 -0.039

Accuracy

### Need better way to calculate this, maybe by item?
df_accuracy = df_hf_models %>%
  filter(layer == max_layer) %>% 
  group_by(model_name2, model_type, numerosity_comparison_type, image_1, 
           max_layer, n_params) %>%
  summarise(mean_cos_sim = mean(cosine_similarity)) %>%
  pivot_wider(names_from = numerosity_comparison_type, values_from = mean_cos_sim) %>%
  mutate(mean_diff = same - different,
         accurate = mean_diff > 0) %>%
  group_by(model_name2, model_type, 
           max_layer, n_params) %>%
  summarise(accuracy = mean(accurate, na.rm = TRUE))
## `summarise()` has grouped output by 'model_name2', 'model_type',
## 'numerosity_comparison_type', 'image_1', 'max_layer'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'model_name2', 'model_type', 'max_layer'.
## You can override using the `.groups` argument.
df_accuracy %>%
  ggplot(aes(x = n_params,
             y = accuracy,
             color = model_type,
             shape = model_type)) +
  geom_point(size = 6,
             alpha = .9) +
  scale_x_log10() +
  geom_text_repel(aes(label=model_name2), size=3) +
  labs(x = "Parameters",
       y = "Accuracy",
       color = "",
       shape = "") +
  theme_minimal() +
  # guides(color="none") +
  scale_color_viridis(option = "mako", discrete=TRUE) +
  theme(text = element_text(size = 12),
        legend.position="bottom")